#### setup ####

rm(list = ls())

getwd()

#setwd()     # uncomment to set working directory

library(haven)
library(rio)
library(ggplot2)

#### importing and preparing data ####

distr <- import("the-promise_districtvars.dta")       # district data in Habsburg




### turnout-plots with different colours by dominant ethnic group ####

distr$cze_maj <- ifelse(distr$s_cze > 0.5, 1, 0)    # = 1 in case of czech majority
summary(distr$cze_maj)

distr$ger_maj <- ifelse(distr$s_ger > 0.5, 1, 0)    # = 1 in case of german majority
summary(distr$ger_maj)

distr$cze_maj <- ifelse(is.na(distr$cze_maj) == T, 0, distr$cze_maj)  # turn NAs into "0"
distr$ger_maj <- ifelse(is.na(distr$ger_maj) == T, 0, distr$ger_maj)  # the same for Germans

distr$no_maj <- ifelse(distr$cze_maj == 0 & distr$ger_maj == 0, 1, 0)  # indicates neither czech nor german majority


data.frame(distr$cze_maj, distr$ger_maj, distr$no_maj)    # check -> seems to be ok

distr$maj <- ifelse(distr$cze_maj == 1, "cze", ifelse(distr$ger_maj == 1, "ger", "none"))  # factor variable indicating type of majority



## plotting voter turnout against indurstrial workers as share of qualified workers


turnout_plot <- ggplot(data = distr, aes(x = s_indworker, y = turnout)) +
        geom_point(aes(shape = maj)) +
        scale_shape_manual(values = c(1,19,3),
                           name = "majority",
                           breaks = c("cze", "ger", "none"),
                           labels = c("czech", "german", "other"))+
        xlab("industrial workers as share of qualified voters") +
        theme(legend.position = "bottom", plot.title = element_text(face = "bold", hjust = 0.5)) +
        coord_cartesian(ylim = c(0.3,1), xlim = c(0,0.8))


turnout_plot
